#-------------------------------------------------------#
#   Quality Assessment of the Academic Freedom Index    #
#-------------------------------------------------------#

# Title: "Quality Assessment of the Academic Freedom Index: Strengths, Weaknesses, and How Best to Use It" 
# Authors: "Lott, Lars", "Spannagel, Janika"
# date: 2024-09-19
# journal: Perspectives on Politics
# DOI: TBA
# written under "R and RStudio (4.4.1)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(lubridate)
library(estimatr)
library(texreg)
library(readxl)
library(haven)
library(scales)
library(fixest)

set.seed(1234)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

## Load Academic Freedom Index Posteriors ##

posteriors_afi <-readRDS(file.path("results", "thin_post", "thin_post_academic_freedom_index.rds")) 

# load country_aggregated data
vdem_full_v13 <- readRDS(file.path("data", "vdem_v13", "vdem_13_cy", "V-Dem-CY-Full+Others-v13.rds"))

# Construct a baseline and small cy-dataset #

basic_df <- vdem_full_v13 %>%
  dplyr::select(country_text_id, year, country_name, e_polity2, e_pop, e_democracy_trans, e_democ)  %>%
  mutate(dem_transition = ifelse(e_democracy_trans==1, 1, 0))

summary(basic_df)

posteriors_afi <- as.data.frame(posteriors_afi)
posteriors_afi$country_date <- rownames(posteriors_afi)
posteriors_afi <- posteriors_afi %>%
  dplyr::select(country_date, everything())

## Step 1: Prepare posterior data - extract country_text_id and historical date var ##

posteriors_afi <- posteriors_afi %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2)))
  
posteriors_afi <- posteriors_afi %>%
  dplyr::select(country_date,country_text_id, historical_date, everything()) %>%
  mutate(historical_date_date_format = as_date(historical_date))

posteriors_afi <- posteriors_afi %>%
  mutate(historical_date_date_format = as_date(historical_date), 
         year = year(historical_date_date_format), 
         month = month(historical_date_date_format), 
         day = day(historical_date_date_format)) %>%
  dplyr::select(country_date,country_text_id, historical_date,historical_date_date_format, year, everything())  %>%
  filter(month ==12, day == 31)  %>%
  dplyr::select(-historical_date_date_format, -historical_date, -month, -day)


## Step 2: Merge datasets by country_text_id and year ##
merged_data <- merge(basic_df, posteriors_afi, by = c("country_text_id", "year"))

## Step 3: Create list with assigned columns ##
basic_df_posterior <- lapply(1:900, function(i) {
  merged_data[, c("country_text_id", "year", "country_name", "e_polity2", "e_pop", "e_democracy_trans", 
                  "e_democ", "dem_transition",  paste0("V", i))]
})

## Use map to iterate over each dataframe in the list and rename the variable ##
basic_df_posterior <- map(basic_df_posterior, ~{
  # Rename the variable
  names(.x)[9] <- "v2xca_academ"
  return(.x)
})

rm(merged_data, posteriors_afi)

## Step 4 Structure data with purr and map ##

all_imputations <- bind_rows(unclass(basic_df_posterior), .id = "m") %>%
  group_by(m) %>%
  nest()

## Step 5: run models with map and return tidy summaries of those models directly in a data frame
models_imputations <- all_imputations %>%
  mutate(model = data %>% map(~ feols(v2xca_academ ~ l(dem_transition, 1) | country_text_id+year , panel.id = ~country_text_id+year, data =.)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

models_imputations


models_imputations %>%
  filter(m == "1") %>%
  unnest(tidied)

## Create a wide data frame of the coefficients, standard errors, p-Values, and confidence intervals ##

model_m2_params <- models_imputations %>%
  unnest(tidied) %>%
  dplyr::select(m, term, estimate, std.error, p.value, conf.low, conf.high) %>%
  gather(key, value, estimate, std.error, p.value, conf.low, conf.high)

data_m1 <- all_imputations %>% filter(m=="1") %>% unnest()
m1 <- feols(v2xca_academ ~ l(dem_transition, 1) | country_text_id+year , panel.id = ~country_text_id+year, data =data_m1)
model_degree_freedom <- degrees_freedom(m1, type = "resid")

model_m2_params_sum <- model_m2_params %>%
  filter(key == "estimate" | key == "std.error") %>%
  pivot_wider(names_from = key, values_from = value) %>%
  group_by(term) %>%
  summarize(Estimate = mean(estimate),
            se.within = mean(std.error), 
            se.between = var(estimate), 
            se.est = sqrt(se.within^2 + se.between*(1 + (1/length(models_imputations$model))))) %>%
  mutate(model = "Model 2 - Uncertainty in main explanatory variable") %>%
  mutate(statistic = Estimate / se.est,
         conf.low = Estimate + se.est * qt(0.025, model_degree_freedom),
         conf.high = Estimate + se.est * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE))

model_m2_params_sum$model <- "Model 2 - With Uncertainty"

##Baseline Model without Uncertainty ##

# Construct a baseline and small cy-dataset #

ds.basic <- vdem_full_v13 %>%
  dplyr::select(country_text_id, year, country_name, e_polity2, e_pop, e_democracy_trans, e_democ, v2xca_academ)  %>%
  mutate(dem_transition = ifelse(e_democracy_trans==1, 1, 0))

m1 <- feols(v2xca_academ ~ l(dem_transition, 1) | country_text_id+year , panel.id = ~country_text_id+year, data =ds.basic)

m1_estimate <- m1 %>% tidy(., conf.int = TRUE)
  
m1_estimate$model <- "Model 1 - Without Uncertainty"

model_m2_params_sum <- model_m2_params_sum %>%
  dplyr::select(-se.within, -se.between) %>%
  rename(estimate = Estimate, 
         std.error = se.est)
data_figure <- rbind(m1_estimate, model_m2_params_sum)

color_pal <- c("#2980b9", "#d35400")

f2 <- ggplot(data = data_figure, aes(y = model, color = model)) +
  geom_vline(xintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(xmin = conf.low,
                     xmax = conf.high), size = 1.5) +
  geom_point(aes(x = estimate), size = 3, color = "black", shape = 19) +
  geom_point(aes(x = estimate), size = 1, color = "white", shape = 19) +
    annotate("text", x=0.0834, y= 1-0.15 , label= "Model 1 - No Uncertainty in DV", color = "black") + 
  annotate("text", x=0.0853, y= 2-0.15 , label= "Model 2 - Uncertainty in DV", color = "black") + 
  labs(title="",x="Estimated Coefficients", y = "", color = "") +
  theme_bw() +
  theme(legend.position="NONE")  +
  xlim(-0.05, 0.15) +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  scale_color_manual(values = color_pal) 

f2 
ggsave("outputs_original_data/Figure_K1.pdf", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)
ggsave("outputs_original_data/Figure_K1.png", width = 20,
       height = 15,
       units = c("cm"), dpi = 1000)


